/*
Heterogeneity in profit response by firm/family characteristics.
Splits the sample on five dimensions and estimates event studies
for each subgroup, for both women and men. Produces Tables 3, A5.

Dimensions:
- >50% ownership vs. not
- working vs. non-working spouse
- above- vs. below-average face-to-face interactions
- any employees vs. owner-operated
- grandparents live nearby vs. not
*/

cd "$root"

global figpath $root/estimates/admin/heterogeneity

****************************************************************
****************************************************************
************** Majority owner
****************************************************************
****************************************************************


forvalues g = 0/1 {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == `g'

* match on ownership structure in t-1
sort lnr lfirm year
bys lnr lfirm : gen owner_lead = owner_cat[_n+1]
replace owner_lead = 0 if owner_lead > 1 & owner_lead != .
* = 1 if majority owner, otherwise 0

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel operating_profits ///
    firm_age ind_cat* profit_before_self owner_lead

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(owner_lead) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)

trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get pretreatment means in t-1
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(extra_match_X)
ren extra_match_X maj_owner
export delimited using ${figpath}/pretreat_means_majowner_male`g'.csv, replace
restore

* estimate for the outcomes where we want percentages
forvalues m = 0/1 {
    aggregate_event_study profit_before_self_trim if extra_match_X == `m', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
	percentage ///
        save("${figpath}/profit_before_self_trim_ATT_male`g'_owner`m'.csv")
}

}

****************************************************************
****************************************************************
************** Working spouse
****************************************************************
****************************************************************


forvalues g = 0/1 {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == `g'

* identify whether partner had a job in t=-1
sort lnr lfirm year
bys lnr lfirm : gen spouse_emp_lead = partner_employed[_n+1]

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* operating_profits ///
    spouse_emp_lead profit_before_self

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(spouse_emp_lead) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)
keep if extra_match_X != .

* trim profits
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
gen age_bin = floor(age / 5)

quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat, gen(dind)
tab age_bin, gen(dage)
tab edlevel, gen(dedl)
}

* get pretreatment means in t-1
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(extra_match_X)
ren extra_match_X partner_employed
export delimited using ${figpath}/pretreat_means_partneremp_male`g'.csv, replace
restore

forvalues d = 0/1 {
aggregate_event_study ///
        profit_before_self_trim if extra_match_X == `d', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
	percentage ///
        store(temp) ///
        save("${figpath}/profit_before_self_trim_ATT_male`g'_partneremp`d'.csv")
}

}


****************************************************************
****************************************************************
************** Face-to-face interactions
****************************************************************
****************************************************************


forvalues g = 0/1 {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

* prep the O*NET data
use data/admin/external/entrepreneur_industries_to_onet.dta, clear
keep if element == "Face-to-Face Discussions"
keep sic_code byear_grp daily
ren daily daily_cust
duplicates drop

tempfile onet
save `onet'

* merge onto main data
use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == `g'

* merge on occupational details from O*NET
gen byear_grp = 2008 if firm_byear <= 2009
replace byear_grp = 2009 if firm_byear > 2009
merge m:1 sic_code byear_grp using `onet', keep(3) nogen

gen daily_ind = daily_cust >= 80 // close to mean

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* operating_profits ///
    profit_before_self daily_ind

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(daily_ind) ///
    no_match_reweight ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)

* trim profits
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
gen age_bin = floor(age / 5)

quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat, gen(dind)
tab age_bin, gen(dage)
tab edlevel, gen(dedl)
}

* get pretreatment means in t-1
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(daily_ind)
export delimited using ${figpath}/pretreat_means_onet_male`g'.csv, replace
restore

forvalues d = 0/1 {
aggregate_event_study ///
        profit_before_self_trim if daily_ind == `d', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
	percentage ///
        store(temp) ///
        save("${figpath}/profit_before_self_trim_ATT_male`g'_dailyface`d'.csv")
}

}


****************************************************************
****************************************************************
************** Any employees
****************************************************************
****************************************************************


forvalues g = 0/1 {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == `g'

* match on employment in t-1
sort lnr lfirm year
bys lnr lfirm : gen emp_lead = n_emp_excl_owners[_n+1]
gen any_emp_lead = cond(emp_lead > 0 & emp_lead != ., 1, 0)

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel operating_profits ///
    firm_age ind_cat* profit_before_self any_emp_lead

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(any_emp_lead) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)

trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get pretreatment means in t-1
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(extra_match_X)
ren extra_match_X any_emp
export delimited using ${figpath}/pretreat_means_anyemp_male`g'.csv, replace
restore

* estimate for the outcomes where we want percentages
forvalues m = 0/1 {
    aggregate_event_study profit_before_self_trim if extra_match_X == `m', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
	percentage ///
        save("${figpath}/profit_before_self_trim_ATT_male`g'_anyemp`m'.csv")
}

}


****************************************************************
****************************************************************
************** Grandparents nearby
****************************************************************
****************************************************************


forvalues g = 0/1 {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"

use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == `g'

* identify whether live near parents in t=0
sort lnr lfirm year
bys lnr lfirm : gen parents_lead = live_near_parents[_n+2]

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* operating_profits ///
    parents_lead profit_before_self

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    matchvar_m2(parents_lead) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes(profit_before_self)
keep if extra_match_X != .

* trim profits
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
gen age_bin = floor(age / 5)

quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat, gen(dind)
tab age_bin, gen(dage)
tab edlevel, gen(dedl)
}

* get pretreatment means in t-1
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(extra_match_X)
ren extra_match_X live_near_parents
export delimited using ${figpath}/pretreat_means_nearparents_male`g'.csv, replace
restore

forvalues d = 0/1 {
aggregate_event_study ///
        profit_before_self_trim if extra_match_X == `d', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
	percentage ///
        store(temp) ///
        save("${figpath}/profit_before_self_trim_ATT_male`g'_nearparents`d'.csv")
}

}

